# Real-Time Speech Emotion Detection

In [8]:
import tkinter as tk
from tkinter import *
import sounddevice as sd
import numpy as np
import threading
import os
import soundfile as sf
import time
from tensorflow.keras.models import model_from_json
from IPython.display import clear_output
%run functions.ipynb

with open('model_a1.json',"r") as file:
    loaded_model_json = file.read()
    model = model_from_json(loaded_model_json)
model.load_weights('model_weights4.h5')
model.compile(optimizer='adam', loss = 'categorical_crossentropy', metrics=['accuracy'])


def start_recording():
    global recording_thread
    label1.configure(foreground="#011638", text="")
    start_button.config(state=tk.DISABLED)
    stop_button.config(state=tk.NORMAL)
    recording_thread = threading.Thread(target=record_audio)
    recording_thread.start()

    
def stop_recording():
    global is_recording
    start_button.config(state=tk.NORMAL)
    stop_button.config(state=tk.DISABLED)
    is_recording = False
    recording_thread.join(timeout=0.5)
    time.sleep(0.1)
            

def record_audio():
    global recording_thread, is_recording, filepath
    is_recording = True
    sr = 44100
    recording = []
    def callback(indata, frames, time, status):
        if status:
            print(status, file=sys.stderr)
        recording.append(indata.copy())
    with sd.InputStream(callback=callback, channels=2, dtype=np.int16):
        while is_recording:
            sd.sleep(100)
    recording = np.concatenate(recording, axis=0)
    filename = "sample_audio.wav"
    sf.write(filename, recording, sr)
    filepath = os.path.abspath(filename)
    predicted_emotion(filepath)

    
def predicted_emotion(filepath):
    try:
        import numpy as np
        features = extract(filepath, mfcc=True, chroma=True, mel=True)
        features_reshaped = np.expand_dims(features, axis=0)
        emotion_index = np.argmax(model.predict(features_reshaped))
        pred = EMOTIONS_LIST_s[emotion_index]
        label1.configure(foreground="#011638", text=pred.capitalize())
        clear_output(wait=True)
    except Exception as e:
        print(f"Error detecting emotion: {e}")
        label1.configure(foreground="#011638", text="Unable to detect")    

EMOTIONS_LIST_s = ['neutral','calm','happy','sad','angry','fearful','disgust','surprised']       

top = tk.Tk()
top.title("Audio")
top.geometry('200x200')
start_button = tk.Button(top, text="Start Recording", command=start_recording)
start_button.pack(pady=10)
stop_button = tk.Button(top, text="Stop Recording", command=stop_recording)
stop_button.pack(pady=10)
label1 = Label(top, background='#CDCDCD', font=('arial',10,'bold'))
label1.pack(side='bottom', expand='True')
top.mainloop()



# Real-Time Facial Emotion Detection

In [9]:
import cv2
import numpy as np
from tensorflow.keras.models import model_from_json
from IPython.display import clear_output

EMOTIONS_LIST = ["Angry","Disgust","Fear","Happy","Neutral","Sad","Surprise"]


def Model(json_file, weights_file):
    with open(json_file, "r") as file:
        loaded_model_json = file.read()
        model = model_from_json(loaded_model_json)
    model.load_weights(weights_file)
    model.compile(optimizer="adam", loss=["categorical_crossentropy"], metrics=["accuracy"])
    return model

cap = cv2.VideoCapture(0)
model = Model("model_a.json", "model_weights.h5")
facec = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

while True:
    ret, frame = cap.read()
    if not ret:
        break
        
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = facec.detectMultiScale(gray, 1.3, 5)
    
    for (x, y, w, h) in faces:
        roi_frame = frame[y:y+h, x:x+w]
        #emotion = prediction(roi_frame)
        roi_frame = cv2.flip(roi_frame, 1)
        roi_frame = cv2.resize(roi_frame, (48, 48))
        roi_frame = cv2.cvtColor(roi_frame, cv2.COLOR_BGR2GRAY)
        roi_frame = roi_frame/255.0
        roi_frame = np.expand_dims(roi_frame, axis=-1)
        #print("Prediciton value: ",pred)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0,255,0), 2)
        pred = EMOTIONS_LIST[np.argmax(model.predict(roi_frame[np.newaxis,:,:,np.newaxis]))]
        cv2.putText(frame, pred, (x, y - 10), cv2.FONT_HERSHEY_COMPLEX, 0.9, (0,255,0), 2)
        clear_output(wait=True)

    cv2.imshow('Drowsiness Detection', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

