In [1]:
import torch.nn as nn
import torch.nn.functional as F
class DCNN(nn.Module):
    def __init__(self, img_depth, num_classes):
        super(DCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=img_depth, out_channels=256, kernel_size=5, padding=2)
        self.batchnorm1 = nn.BatchNorm2d(256)
        
        self.conv2 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=5, padding=2)
        self.batchnorm2 = nn.BatchNorm2d(128)
        
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.dropout1 = nn.Dropout(0.4)
        
        self.conv3 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.batchnorm3 = nn.BatchNorm2d(128)
        
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.batchnorm4 = nn.BatchNorm2d(128)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.dropout2 = nn.Dropout(0.4)
        
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.batchnorm5 = nn.BatchNorm2d(256)
        
        self.conv6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.batchnorm6 = nn.BatchNorm2d(256)
        self.pool3 = nn.MaxPool2d(kernel_size=2)
        self.dropout3 = nn.Dropout(0.5)
        
        # Flattening the output for the dense layer
        self.flatten = nn.Flatten()
        # 正确计算展平后的尺寸
        self.dense1 = nn.Linear(256 * 6 * 6, 128)  # Adjusted size after pooling
        self.batchnorm7 = nn.BatchNorm1d(128)
        self.dropout4 = nn.Dropout(0.6)
        
        self.out_layer = nn.Linear(128, num_classes)

    def forward(self, x):
        x = F.elu(self.batchnorm1(self.conv1(x)))
        x = F.elu(self.batchnorm2(self.conv2(x)))
        x = self.dropout1(self.pool1(x))
        
        x = F.elu(self.batchnorm3(self.conv3(x)))
        x = F.elu(self.batchnorm4(self.conv4(x)))
        x = self.dropout2(self.pool2(x))
        
        x = F.elu(self.batchnorm5(self.conv5(x)))
        x = F.elu(self.batchnorm6(self.conv6(x)))
        x = self.dropout3(self.pool3(x))
        
        x = self.flatten(x)
        x = F.elu(self.batchnorm7(self.dense1(x)))
        x = self.dropout4(x)
        x = self.out_layer(x)
        return x  # 返回 logits 用于计算交叉熵损失

In [2]:
import cv2
import torch as t

device = t.device("cuda" if t.cuda.is_available() else "cpu")

# 加载Haar特征的级联分类器，用于面部检测
detection_model_path = './haarcascade_files/haarcascade_frontalface_default.xml'
emotion_model_path = './models/face_emotion_rec_model_5.pth'
face_detection = cv2.CascadeClassifier(detection_model_path) # 人脸检测模型

# 加载情绪识别模型
emotion_classifier = t.load(emotion_model_path, map_location= device)
EMOTIONS = ['disgust',  'happiness', 'sadness', 'surprise',  'neutral']

In [3]:
def emotion_testing(test_img):
    
    # 定义一个变量来控制是否显示当前帧
    predicted_emotion = None
        
    # 照片转化为RGB灰度图
    gray_img= cv2.cvtColor(test_img, cv2.COLOR_BGR2GRAY)

    # 检测
    faces_detected = face_detection.detectMultiScale(gray_img, 1.32, 5)

    for (x,y,w,h) in faces_detected:
        # 裁剪人脸并调整格式
        cv2.rectangle(test_img,(x,y),(x+w,y+h),(255,0,0),thickness=3)
        roi_gray=gray_img[y:y+w,x:x+h]
        
        # 转化为48 x 48的灰度图并转化为像素点
        roi_gray=cv2.resize(roi_gray,(48,48))
        img_pixels = np.expand_dims(roi_gray, axis=2)
        
        # 提升维度，将值的范围控制到0~1
        img_pixels = np.expand_dims(img_pixels, axis = 0)
        img_pixels = img_pixels.astype(np.float32)
        img_pixels /= 255
        
        img_pixels = t.from_numpy(img_pixels)
        img_pixels = img_pixels.permute(0, 3, 1, 2)  # Reorder from NHWC to NCHW
        print(f'img_pixels:{img_pixels.shape}')
        # 预测情绪
        # 前向传播得到输出
        with torch.no_grad():  # 关闭梯度计算
            output = emotion_classifier(img_pixels.to(device))
            print(f'output:{F.softmax(output, dim=1)}')
            _, prediction = torch.max(F.softmax(output, dim=1), 1)
        
        predicted_emotion = EMOTIONS[prediction]
        
        cv2.putText(test_img, predicted_emotion, (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 3, (0,255,0), 3)
        resized_img = cv2.resize(test_img, (0, 0), fx=0.25, fy=0.25, interpolation=cv2.INTER_NEAREST)
        
    return predicted_emotion, resized_img

In [4]:
# 找到对应的情绪
from pywebio.input import file_upload
from pywebio.output import *
from pywebio import start_server
import asyncio
import nest_asyncio
from PIL import Image
import io
import torch
import numpy as np
import time

def upload_and_predict():
    # 文件上传
    file = file_upload("Upload a photo to recognize your emotion", accept="image/*")
    if file is not None:
        # 预处理图片
        image = Image.open(io.BytesIO(file['content']))
        image = np.array(image)
        
        emotion_word, resized_img = emotion_testing(image)
        print(f'Emotion detected is {emotion_word}')
        image_pil = Image.fromarray(resized_img)
        with use_scope('playlist', clear = True):
            put_image(image_pil)
            put_markdown('Wait for a moment......')
        time.sleep(3)
        with use_scope('playlist', clear = True):
            put_text('playlist')

if __name__ == '__main__':
    # 启动 PyWebIO 应用
    nest_asyncio.apply()
    asyncio.create_task(start_server(upload_and_predict, port=8080, debug=True, notebook=True))

Running on all addresses.
Use http://10.80.43.30:8080/ to access the application
img_pixels:torch.Size([1, 1, 48, 48])
output:tensor([[4.7055e-04, 5.8363e-01, 1.9249e-02, 1.1637e-02, 3.8501e-01]],
       device='cuda:0')
Emotion detected is happiness


KeyboardInterrupt: 