In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import tensorflow as tf
from keras import optimizers
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Flatten, Dense, Conv2D, MaxPooling2D
from keras.layers import Dropout, BatchNormalization, LeakyReLU, Activation
from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator







In [2]:
# 2013年keggle人脸识别竞赛数据集
df = pd.read_csv('fer2013.csv')
print(df.shape)
df.head()

(35887, 3)


Unnamed: 0,emotion,pixels,Usage
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training
1,0,151 150 147 155 148 133 111 140 170 174 182 15...,Training
2,2,231 212 156 164 174 138 161 173 182 200 106 38...,Training
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training


0 anger 生气； 1 disgust 厌恶； 2 fear 恐惧； 3 happy 开心； 4 sad 伤心；5 surprised 惊讶； 6 normal 中性

In [3]:
emotion_label_to_text = {0:'anger', 1:'disgust', 2:'fear', 3:'happiness', 4: 'sadness', 5: 'surprise', 6: 'neutral'}
INTERESTED_LABELS = [0, 1, 3, 4, 6]
df = df[df.emotion.isin(INTERESTED_LABELS)]
df.shape

(26764, 3)

In [4]:
# 把df.pixels的字符串数据转化为数组(48, 48, 1) 1表示1个通道（灰度）
img_array = df.pixels.apply(lambda x: np.array(x.split(' ')).reshape(48,48,1).astype('float'))
img_array

0        [[[70.0], [80.0], [82.0], [72.0], [58.0], [58....
1        [[[151.0], [150.0], [147.0], [155.0], [148.0],...
3        [[[24.0], [32.0], [36.0], [30.0], [32.0], [23....
4        [[[4.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...
6        [[[20.0], [17.0], [19.0], [21.0], [25.0], [38....
                               ...                        
35881    [[[181.0], [177.0], [176.0], [156.0], [178.0],...
35882    [[[50.0], [36.0], [17.0], [22.0], [23.0], [29....
35883    [[[178.0], [174.0], [172.0], [173.0], [181.0],...
35884    [[[17.0], [17.0], [16.0], [23.0], [28.0], [22....
35885    [[[30.0], [28.0], [28.0], [29.0], [31.0], [30....
Name: pixels, Length: 26764, dtype: object

In [5]:
# 换成4维(N, 48, 48, 1) N为图片的数量
img_array = np.stack(img_array, axis=0)
img_array /= 255
img_array

array([[[[0.2745098 ],
         [0.31372549],
         [0.32156863],
         ...,
         [0.20392157],
         [0.16862745],
         [0.16078431]],

        [[0.25490196],
         [0.23921569],
         [0.22745098],
         ...,
         [0.21960784],
         [0.20392157],
         [0.17254902]],

        [[0.19607843],
         [0.16862745],
         [0.21176471],
         ...,
         [0.19215686],
         [0.21960784],
         [0.18431373]],

        ...,

        [[0.35686275],
         [0.25490196],
         [0.16470588],
         ...,
         [0.28235294],
         [0.21960784],
         [0.16862745]],

        [[0.30196078],
         [0.32156863],
         [0.30980392],
         ...,
         [0.41176471],
         [0.2745098 ],
         [0.18039216]],

        [[0.30196078],
         [0.28235294],
         [0.32941176],
         ...,
         [0.41568627],
         [0.42745098],
         [0.32156863]]],


       [[[0.59215686],
         [0.58823529],
         [0.57

In [6]:
img_array.shape

(26764, 48, 48, 1)

In [7]:
# 将情绪编码到0 1，并用one-hot的编码
from keras.utils import to_categorical
le = LabelEncoder()
img_labels = le.fit_transform(df['emotion'])
img_labels = to_categorical(img_labels)
img_labels

array([[1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0.],
       ...,
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.]], dtype=float32)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(img_array, img_labels, test_size=0.2, shuffle=True, random_state=42, stratify=img_labels)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(21411, 48, 48, 1)
(5353, 48, 48, 1)
(21411, 5)
(5353, 5)


In [9]:
img_width = X_train.shape[1]
img_height = X_train.shape[2]
img_depth = X_train.shape[3]
num_classes = y_train.shape[1]

In [10]:
def build_net(optim):
    ''' 
    这是一个深度卷积神经网络
    Conv2D: 卷积层，作用包括特征提取、保持空间关系、参数共享、局部感受野、减少计算量（相比全连接层）
    BatchNormalization: 批量归一化， 小批量标准化处理加速训练过程，提高模型稳定性减少过拟合风险，减轻梯度消失以支持更深层的网络
    MaxPooling2D: 减少数据维度以减少计算，提取区域最大值、学到更抽象的特征表示，防止过拟合，保持特征最重要的部分
    Dropout: 弃置层，丢掉一部分神经元以防止过拟合、减少神经元之间的依赖，提高训练效率，近似集成学习
    Flatten: 将卷积、池化后的特征图展开为一维， 交给全连接层使用
    Dense: 全连接层，一般用在网络的后端，特征学习、决策制定、数据映射、配套激活函数进行参数学习等等
    '''
    net = Sequential(name='DCNN')

    net.add(Conv2D(filters=256,kernel_size=(5,5),input_shape=(img_width, img_height, img_depth),activation='elu',
                    padding='same',kernel_initializer='he_normal', name='conv2d_1'))
    net.add(BatchNormalization(name='batchnorm_1'))
    
    net.add(Conv2D(filters=128,kernel_size=(5,5),activation='elu',padding='same',kernel_initializer='he_normal',
                name='conv2d_2'))
    net.add(BatchNormalization(name='batchnorm_2'))
    
    net.add(MaxPooling2D(pool_size=(2,2), name='maxpool2d_1'))
    net.add(Dropout(0.4, name='dropout_1'))
    
    net.add(Conv2D(filters=128,kernel_size=(3,3),activation='elu',padding='same',kernel_initializer='he_normal',
                name='conv2d_3'))
    net.add(BatchNormalization(name='batchnorm_3'))
    
    net.add(Conv2D(filters=128,kernel_size=(3,3),activation='elu',padding='same',kernel_initializer='he_normal',
                name='conv2d_4'))
    net.add(BatchNormalization(name='batchnorm_4'))
    net.add(MaxPooling2D(pool_size=(2,2), name='maxpool2d_2'))
    net.add(Dropout(0.4, name='dropout_2'))
    
    # net.add(Conv2D(filters=256,kernel_size=(3,3),activation='elu',padding='same',kernel_initializer='he_normal',
    #             name='conv2d_5'))
    # net.add(BatchNormalization(name='batchnorm_5'))
    
    # net.add(Conv2D(filters=256,kernel_size=(3,3),activation='elu',padding='same',kernel_initializer='he_normal',
    #             name='conv2d_6'))
    # net.add(BatchNormalization(name='batchnorm_6'))
    # net.add(MaxPooling2D(pool_size=(2,2), name='maxpool2d_3'))
    # net.add(Dropout(0.5, name='dropout_3'))
    
    net.add(Flatten(name='flatten'))
    net.add(Dense(128,activation='elu',kernel_initializer='he_normal',name='dense_1'))
    net.add(BatchNormalization(name='batchnorm_7'))
    net.add(Dropout(0.6, name='dropout_4'))
    net.add(Dense(num_classes,activation='softmax',name='out_layer'))

    # 多交叉熵损失函数， adam是一个随机梯度下降效率很高的算法
    net.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

    net.summary()
    
    return net

In [11]:
# 两个早停设置，监视验证集的准确率，patience指可忍耐的不改善回合，verbose表示输出训练信息，restore_表示恢复训练中最好的参数
early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.00005, patience=10, verbose=1, restore_best_weights=True)

lr_scheduler = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=7, min_lr=1e-7, verbose=1)

callbacks = [early_stopping, lr_scheduler]

In [12]:
# 图片数据增强器， 减少过拟合、增强模型泛化能力， 几个参数是随机对图像进行的处理，包括旋转、左右上下平移，剪切缩放，水平翻转
train_datagen = ImageDataGenerator(rotation_range=15, width_shift_range=0.15, height_shift_range=0.15, 
                                   shear_range=0.15, zoom_range=0.15, horizontal_flip=True)
train_datagen.fit(X_train)

In [13]:
batch_size = 64
epochs = 24
optims = [optimizers.Nadam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, name='Nadam'), optimizers.Adam(0.001)]
model = build_net(optims)
history = model.fit(train_datagen.flow(X_train, y_train, batch_size=batch_size), validation_data=(X_test, y_test), 
                    steps_per_epoch=len(X_train) / batch_size, epochs=epochs, callbacks = callbacks)




Model: "DCNN"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_1 (Conv2D)           (None, 48, 48, 256)       6656      
                                                                 
 batchnorm_1 (BatchNormaliz  (None, 48, 48, 256)       1024      
 ation)                                                          
                                                                 
 conv2d_2 (Conv2D)           (None, 48, 48, 128)       819328    
                                                                 
 batchnorm_2 (BatchNormaliz  (None, 48, 48, 128)       512       
 ation)                                                          
                                                                 
 maxpool2d_1 (MaxPooling2D)  (None, 24, 24, 128)       0         
                                                                 
 dropout_1 (Dropout)         (None, 24, 24, 128)       0   

In [14]:
model.save('emotion_recognition_model.h5')

  saving_api.save_model(
