In [None]:
# import packages
import cv2
import os
import glob 
import numpy as np
import torchvision
import torch
import torch.nn as nn
import torch.nn.functional as F
import tensorflow as tf
from torch.autograd import Variable
import torch.utils.data as Data
from matplotlib import pyplot as plt
from torchvision import transforms, models
from GPUtil import showUtilization as gpu_usage
from numba import cuda
from PIL import Image
from copy import copy
import splitfolders
from torchsummary import summary
# import scheduler for learning rate change
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.cuda.amp import GradScaler, autocast
import pickle
from flask import Flask, render_template, Response, request
import mediapipe as mp
import time
import numpy

In [None]:
# jupyter notebook can't import file. Copy the class structure of ensemble model from project.ipynb
class MyEnsemble(nn.Module):
    def __init__(self, model1, model2, model3, model4, num_classes=7):
        super(MyEnsemble, self).__init__()
        self.model1 = model1
        self.model2 = model2
        self.model3 = model3
        self.model4 = model4
        self.model5 = model5
        # Remove last linear layer
        self.model1.fc = nn.Identity()
        self.model2.fc = nn.Identity()
        self.model3.fc = nn.Identity()
        self.model4.fc = nn.Identity()
        self.model5.fc = nn.Identity()
        # Create new classifier
        #self.classifier = nn.Linear(4110, num_classes)
        self.classifier = nn.Sequential(#nn.Dropout(0.5),
                               #nn.Flatten(),
                            nn.Linear(4110, 4096),
                            nn.LeakyReLU(),
                            #nn.Dropout(0.5),
                            nn.Linear(4096, 1024),
                            #nn.BatchNorm1d(1024),
                            nn.LeakyReLU(),
                            #nn.Dropout(0.5),
                            nn.Linear(1024, 7))
        
    def forward(self, x):
        x1 = self.model1(x.clone())  # clone to make sure x is not changed by inplace methods
        x1 = x1.view(x1.size(0), -1)
        x2 = self.model2(x)
        x2 = x2.view(x2.size(0), -1)
        x3 = self.model3(x)
        x3 = x3.view(x3.size(0), -1)
        x4 = self.model4(x)
        x4 = x4.view(x4.size(0), -1)
        #x5 = self.model5(x)
        #x5 = x5.view(x5.size(0), -1)
        
        x = torch.cat((x1, x2, x3, x4), dim=1)
        x = self.classifier(F.relu(x))
        return x


In [None]:
# https://pyimagesearch.com/2019/09/02/opencv-stream-video-to-web-browser-html-page/
import os
os.chdir("C:/Users/jixua/OneDrive/Desktop/Machine learning package/Web")
emotion_model = torch.load('ensemble_model4_tmax150')
app = Flask(__name__, template_folder = './templates')
camera = cv2.VideoCapture(0)

global switch 
switch = 0
global img

#normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
test_image_transform = transforms.Compose([
    transforms.Grayscale(1),
    transforms.Resize(48),
    transforms.TenCrop(40),
    transforms.Lambda(lambda crops : torch.stack([transforms.ToTensor()(crop) for crop in crops])),
])


device = torch.device('cuda') if torch.cuda.is_available else torch.device('cpu')
emotion_model = emotion_model.to(device)
#data_dir = 'C:/Users/jixua/OneDrive/Desktop/Machine learning package'
#test_set = torchvision.datasets.ImageFolder(data_dir + '/archive/test', transform = valid_image_transform)
labels_class = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

In [None]:
# real-time-detection using Google mediapipe
def real_time_detection(model): 
    global img
    if switch == 1:
        #cap = cv2.VideoCapture(0)
        #cap = camera
        pTime = 0

        mpFaceDetection = mp.solutions.face_detection
        mpDraw = mp.solutions.drawing_utils
        faceDetection = mpFaceDetection.FaceDetection(0.6)

        while True:
            if switch == 0:
                break
            success, img = camera.read()

            imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            results = faceDetection.process(imgRGB)
            #print(results)

            if results.detections:
                for id, detection in enumerate(results.detections):
                    bboxC = detection.location_data.relative_bounding_box
                    ih, iw, ic = img.shape
                    bbox = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)
                    #print(bbox)

                    cv2.rectangle(img, bbox, (255, 0, 255), 2)
                    #cv2.putText(img, f'{int(detection.score[0] * 100)}%',
                                #(bbox[0], bbox[1] - 20), cv2.FONT_HERSHEY_PLAIN,
                                #2, (255, 0, 255), 2)
                    # process the crop_img
                    #crop_img = img[bbox[1]:bbox[1]+bbox[3], bbox[0]:bbox[0]+bbox[2]]
                    #crop_img = img[int(bboxC.xmin * iw-10): int(bboxC.width * iw), int(bboxC.ymin * ih-10): int(bboxC.height * ih)]

                    #cv2.imshow("crop", crop_img)
                    #crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB)
                    crop_img = Image.fromarray(img)
                    crop_img = crop_img.crop(bbox)
                    crop_img = crop_img.resize((48, 48))
                    crop_img_tensor = test_image_transform(crop_img)
                    crop_img_variable = Variable(crop_img_tensor.unsqueeze(0))
                    crop_img_variable = crop_img_variable.to(device)
                    bs, ncrops, c, h, w = crop_img_variable.shape
    
                    crop_img_variable = crop_img_variable.view(-1, c, h, w)
                    output = model(crop_img_variable)
                    output = output.view(bs, ncrops, -1)
                    output = torch.sum(output, dim=1) / ncrops
    
                    h_x = torch.nn.functional.softmax(output, dim = 1).data.squeeze()
                    probs, idx = h_x.sort(0, True)
                    cv2.putText(img, '{}. probability:{:.3f}'.format(labels_class[idx[0].item()], probs[0]), (bbox[0], bbox[1] - 20), cv2.FONT_HERSHEY_PLAIN,
                                1, (255, 0, 255), 2)

            cTime = time.time()
            fps = 1 / (cTime - pTime)
            pTime = cTime
            cv2.putText(img, f'FPS: {int(fps)}', (20, 70), cv2.FONT_HERSHEY_PLAIN, 3, (0, 255, 0), 2)
            #cv2.imshow("Image", img)
            
            (flag, encodedImage) = cv2.imencode(".jpg", img)
            if not flag:
                continue
                
            yield(b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + 
                bytearray(encodedImage) + b'\r\n')
            
            # q to quit the program
            #if cv2.waitKey(1) & 0xFF == ord('q'):
                #break

    camera.release()
    cv2.destroyAllWindows()

In [None]:
# real-time detection using YOLO
os.chdir("C:/Users/jixua/OneDrive/Desktop/Machine learning package/YOLO")
import torch
import matplotlib as plt
from model import Yolo
from util import (get_result, input_image, plot_image)
import cv2
from PIL import Image



def to_xyxy(bbox):
    if(len(bbox)):
        class_num, chances, xcenter, ycenter, width, height = bbox[0][0], bbox[0][1], bbox[0][2], bbox[0][3], bbox[0][4], bbox[0][5]
        x1, y1 = xcenter-width/2, ycenter-height/2
        x2, y2 = xcenter+width/2, ycenter+height/2
        return x1, y1, x2, y2
    else:
        return 0, 0, 0, 0
    
    
def yolo_detection(emotion_model):
    #os.chdir("C:/Users/jixua/OneDrive/Desktop/Machine learning package/YOLO")
    global img
    
    model = torch.load('model.pth')
    model = model.to(device)
    
    pTime = 0

    if switch == 1:
        #cap = cv2.VideoCapture(0)
        while True:
            if switch == 0:
                break
            success, img = camera.read()
            if img is None:
                continue
            #imge to tensor 
            new_img = input_image(Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)))
            new_img = new_img.to(device)
            bbox = get_result(model(new_img))
            #plot_image(Image.fromarray(img), bbox)
            #bbox is in the form of 0, xcenter(normalize), ycenter(normalize), width(normalize), height(normalize)
            x1, y1, x2, y2 = to_xyxy(bbox)
            img = cv2.rectangle(img,  (int(x1 * img.shape[1]),int(y1* img.shape[0])), (int(x2* img.shape[1]),
                                                                                       int(y2* img.shape[0])), (255,0,0), 2)
            
            bbox_temp = int(x1 * img.shape[1]), int(y1* img.shape[0]), int(x2* img.shape[1]), int(y2* img.shape[0])
            crop_img = Image.fromarray(img)
            crop_img = crop_img.crop(bbox_temp)
            crop_img = crop_img.resize((48, 48))
            crop_img_tensor = test_image_transform(crop_img)
            crop_img_variable = Variable(crop_img_tensor.unsqueeze(0))
            crop_img_variable = crop_img_variable.to(device)
            bs, ncrops, c, h, w = crop_img_variable.shape
            crop_img_variable = crop_img_variable.view(-1, c, h, w)
            
            output = emotion_model(crop_img_variable)
            output = output.view(bs, ncrops, -1)
            output = torch.sum(output, dim=1) / ncrops
            
            h_x = torch.nn.functional.softmax(output, dim = 1).data.squeeze()
            probs, idx = h_x.sort(0, True)
            cv2.putText(img, '{}. probability:{:.3f}'.format(labels_class[idx[0].item()], probs[0]), (int(x2* img.shape[1]-10),int(y2* img.shape[0])), cv2.FONT_HERSHEY_PLAIN,1, (255, 0, 255), 2)
            cTime = time.time()
            fps = 1 / (cTime - pTime)
            pTime = cTime
            # put fps
            #cv2.putText(img, f'FPS: {int(fps)}', (20, 70), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0), 2)
            
            (flag, encodedImage) = cv2.imencode(".jpg", img)
            if not flag:
                continue
                
            yield(b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + 
                bytearray(encodedImage) + b'\r\n')
            
            
            
    camera.release()
    cv2.destroyAllWindows()

In [None]:
os.chdir("C:/Users/jixua/OneDrive/Desktop/Machine learning package/Web")
@app.route('/')
def index():
    return render_template('index.html')

@app.route('/video')
def detector():
    #return Response(real_time_detection(emotion_model), mimetype = 'multipart/x-mixed-replace; boundary=frame')
    return Response(yolo_detection(emotion_model), mimetype = 'multipart/x-mixed-replace; boundary=frame')

@app.route('/requests', methods = ['POST', 'GET'])
def capturing():
    global switch,camera
    if request.method == 'POST':
        if request.form.get('stop') == 'STOP/START':
            if(switch == 1):
                switch = 0
                camera.release()
                cv2.destroyAllWindows()
            else:
                camera = cv2.VideoCapture(0)
                switch = 1
    elif request.method == 'GET':
        return render_template('index.html')
    return render_template('index.html')

if __name__ == '__main__':
    app.run(threaded=True)
    
camera.release()
cv2.destroyAllWindows()