In [4]:
import tkinter as tk
from tkinter import filedialog
import keras 
import numpy as np
import librosa
import sounddevice as sd
import wavio

class LivePredictionsApp:
    def __init__(self):
        # This is the constructor method that initializes the attributes of the class
        self.window = tk.Tk() # This creates a tkinter window object
        self.window.title("Live Speech Emotion Detection") # This sets the title of the window
        self.window.geometry("600x400") # This sets the size of the window

        # Configure window background color and font settings
        self.window.configure(bg="#F0F0F0") # This sets the background color of the window
        self.label_font = ("Arial", 18, "bold") # This defines a font for labels
        self.button_font = ("Arial", 14) # This defines a font for buttons
        self.result_font = ("Arial", 16, "bold") # This defines a font for results

        self.label = tk.Label(self.window, text="Upload an audio file:", font=self.label_font, bg="#F0F0F0", fg="#007AFF") # This creates a label widget with some text and font settings
        self.label.pack(pady=20) # This places the label in the window with some padding

        self.upload_button = tk.Button(self.window, text="Upload", command=self.upload_file, bg="#007AFF", fg="white", font=self.button_font) # This creates a button widget that calls the upload_file method when clicked
        self.upload_button.pack(pady=10) # This places the button in the window with some padding

        self.record_button = tk.Button(self.window, text="Record", command=self.record_voice, bg="#007AFF", fg="white", font=self.button_font) # This creates another button widget that calls the record_voice method when clicked
        self.record_button.pack(pady=10) # This places the button in the window with some padding

        self.prediction_label = tk.Label(self.window, text="", font=self.result_font, bg="#F0F0F0", fg="#007AFF") # This creates another label widget that will display the prediction result
        self.prediction_label.pack(pady=20) # This places the label in the window with some padding

        self.path = "" # This is an attribute that will store the path of the audio file
        self.model_path = r'E:\#1 DATA SCIENCE\#4 Deep Learning\Projects\Speech Emotion Detection\SER_MODEL\SER_model.h5' # This is an attribute that stores the path of the keras model
        
    def upload_file(self):
        # This is a method that allows the user to upload an audio file from their computer
        self.path = filedialog.askopenfilename(title="Select an audio file", filetypes=[("Audio Files", "*.wav")]) # This opens a file dialog and returns the path of the selected file
        if self.path: # If a file was selected
            self.make_predictions() # Call the make_predictions method

    def record_voice(self):
        # This is a method that allows the user to record their voice using their microphone
        fs = 44100  # Sample rate
        seconds = 5  # Duration of recording

        print("Recording...") 
        recording = sd.rec(int(fs * seconds), samplerate=fs, channels=1, dtype='int16') # This uses the sounddevice module to record audio data as an array
        sd.wait() # This waits until the recording is finished
        print("Finished recording.")

        WAVE_OUTPUT_FILENAME = "recorded_audio.wav" 
        wavio.write(WAVE_OUTPUT_FILENAME, recording, fs, sampwidth=2) # This uses the wavio module to write the audio data as a wav file

        self.path = WAVE_OUTPUT_FILENAME # Set the path attribute to the wav file name
        self.make_predictions() # Call the make_predictions method

    def make_predictions(self):
        # This is a method that uses the keras model to predict the emotion of the speaker in the audio file
        data, sampling_rate = librosa.load(self.path) # This uses the librosa module to load the audio data and sampling rate from the file
        mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sampling_rate, n_mfcc=40).T, axis=0) # This uses the librosa module to extract mfcc features from the audio data and compute their mean along each frame
        x = np.expand_dims(mfccs, axis=1) # This adds a new dimension to the mfcc array to match the input shape of the model
        x = np.expand_dims(x, axis=0) # This adds another new dimension to the mfcc array to match the input shape of the model

        loaded_model = keras.models.load_model(self.model_path) # This loads the keras model from the file
        predictions = loaded_model.predict(x) # This uses the model to make predictions on the mfcc array
        predicted_class = np.argmax(predictions, axis=1)[0] # This finds the index of the highest prediction value and assigns it to the predicted class
        predicted_emotion = self.convert_class_to_emotion(predicted_class) # This converts the predicted class to a corresponding emotion using a helper method

        self.prediction_label.config(text="Prediction: " + predicted_emotion, fg="#007AFF") # This updates the prediction label with the predicted emotion

    @staticmethod
    def convert_class_to_emotion(pred):
        # This is a helper method that maps a numerical class to an emotion label
        label_conversion = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful', 6: 'disgust', 7: 'surprised'} # This is a dictionary that stores the mapping
        return label_conversion.get(pred, 'Unknown') # This returns the emotion label for the given class or 'Unknown' if not found



    def run(self):
        self.window.mainloop()

if __name__ == "__main__":
    app = LivePredictionsApp()
    app.run()


Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
Recording...
Finished recording.
