In [12]:
import cv2
import subprocess
from deepface import DeepFace
import speech_recognition as sr
import unicodedata
import string
import re
import os
import time

In [16]:
cap = cv2.VideoCapture('uploads/test.mp4')
emotion_counts = {
    'angry': 0,
    'disgust': 0,
    'fear': 0,
    'happy': 0,
    'sad': 0,
    'surprise': 0,
    'neutral': 0,
    'no_face': 0,
}
faceCascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

while True:
    # read frame
    ret, frame = cap.read()
    if not ret:
        break
    else:
        # convert to grayscale
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # detect faces in the image
        faces = faceCascade.detectMultiScale(
            gray,
            scaleFactor=1.3,
            minNeighbors=5,
            minSize=(30, 30),
            flags=cv2.CASCADE_SCALE_IMAGE
        )

        # loop over faces
        for (x, y, w, h) in faces:
            # extract face
            face = frame[y:y+h, x:x+w]
            # recognize emotion if a face is detected
            if len(face) > 0:
                try:
                    result = DeepFace.analyze(face, actions=['emotion'], enforce_detection=True)
                    if result[0]['dominant_emotion'] is not None:
                        emotion_counts[result[0]['dominant_emotion']] += 1
                    else:
                        no_count['no_emotion'] += 1
                except ValueError as err:
                    emotion_counts['no_face'] += 1
            # update the no_face count if no face is detected
            else:
                emotion_counts['no_face'] += 1

emotion_counts['angry'] = emotion_counts['angry'] * 0.2
emotion_counts['disgust'] = emotion_counts['disgust'] * 0.2
emotion_counts['fear'] = emotion_counts['fear'] * 0.2
emotion_counts['happy'] = emotion_counts['happy'] * 1.3
emotion_counts['sad'] = emotion_counts['sad'] * 0.2
emotion_counts['neutral'] = emotion_counts['neutral'] * 1
emotion_counts['no_face'] = 0

command = ['ffmpeg', '-i', 'uploads/test.mp4', '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', '-ac', '2', 'uploads/audio.wav']
subprocess.call(command)

r = sr.Recognizer()
audio_file = sr.AudioFile('uploads/audio.wav')
with audio_file as source:
    audio = r.record(source)
    
text = r.recognize_google(audio)


text_list = text.split("for this question")
cleaned_list = []
for text in text_list:
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(' +', ' ', text)
    cleaned_list.append(text)
text_list = cleaned_list
text_list = [text for text in text_list if text.strip()]
normalized_list = [unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode('ASCII') for text in text_list]

Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 11.56it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 23.25it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 18.34it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 14.38it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 21.67it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 15.50it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 10.05it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 20.83it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 17.24it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 14.93it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 17.38it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 20.60it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 13.79it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 20.41it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 19.23it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 13.

In [None]:
argmax(emotion_counts)

{'angry': 48.3,
 'disgust': 0.2,
 'fear': 23.0,
 'happy': 20.4,
 'sad': 30.099999999999998,
 'surprise': 0,
 'neutral': 45,
 'no_face': 0}

In [15]:
normalized_list

[' I am wearing a grey colored suit ', ' I am recording this on a laptop']

In [11]:
cap.release()
time.sleep(1)  # Add a 1-second delay

os.remove("uploads/test.mp4")
os.remove("uploads/audio.wav")