In [3]:
import numpy as np
import librosa
import soundfile as df
import sounddevice as sd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tkinter import Tk, Button, Label, filedialog, messagebox
import os
from glob import glob


In [4]:
def load_preprocess_audio(file_path):
  y,sr=librosa.load(file_path, sr=None)
  y_trimmed,_=librosa.effects.trim(y,top_db=20)
  mfcc=librosa.feature.mfcc(y=y_trimmed,sr=sr,n_mfcc=13)
  return np.mean(mfcc.T,axis=0)

In [6]:
def extract_emotion_label(file_path):
  file_name = os.path.basename(file_path)
  emotion_code = int(file_name.split('-')[2])
  emotion_map={
      1:"neutral",2:"calm",3:"happy",4:"sad",
      5:"angry",6:"fearful",7:"disgust",
      8:"surprised"
  }
  return emotion_map.get(emotion_map).get(emotion_code,"unknown")

In [7]:
def detect_gender(file_path):
  file_name = os.path.basename(file_path)
  gender_code = int(file_name.split('-')[3])
  gender_map={1:'male',2:'female'}
  return gender_map.get(gender_code,"unknown")

In [8]:
def train_emotion_model(audio_files,labels):
  features = [load_preprocess_audio(file) for file in audio_files]
  X=np.array(features)
  y=np.array(labels)
  le=LabelEncoder()
  y_encoded=le.fit_transform(y)
  X_train,X_test,y_train,y_test=train_test_split(X,y_encoded,test_size=0.2,random_state=42)
  model=SVC(kernel='linear')
  model.fit(X_train,y_train)
  y_pred=model.predict(X_test)
  accuracy=accuracy_score(y_test,y_pred)
  print(f"Model Accuracy: {accuracy:.2f}")
  return model,le

In [9]:
#gui

In [13]:
class EmotionDetectionApp:
  def __init__(self, root):
    self.root=root
    self.root.title("Emotion Detection through voice")
    self.model=None
    self.le=None

    Label(root,text="Upload Audio File").pack()
    Button(root,text="Upload voice note", command=self.upload_voice_note).pack()
    Button(root, text="Record Voice", command=self.record_voice).pack()

  def upload_voice_note(self):
    file_path = filedialog.askopenfilename(filetypes=[("Audio Files", "*.wav*.mp3")])
    if file_path:
      self.process_audio(file_path)

  def record_voice(self):
    duration = 5
    fs = 44100
    messagebox.showinfo("Recording", "Recording will start now. Please speak into the microphone for 5 seconds.")
    audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    sd.wait()
    file_path("recorded_voice.wav")
    sf.write(file_path, audio_data,fs)
    self.process_audio(file_path)

  def process_audio(self,file_path):
    gender = detect_gender(file_path)
    if gender != "female":
      messagebox.showerror("Error", "Please upload a female voice note.")
      return
    features = load_preprocess_audio(file_path)
    if self.model:
      emotion_encoded = self.model.predict([features])
      emotion = self.le.inverse_transform(emotion_encoded)
      messagebox.showinfo("Emotion Detected",f"The Detected emotion is: {emotion[0]}")
    else:
      messagebox.showerror("Error", "Model not trained. Please upload a voice note first.")


In [None]:
if __name__ == "__main__":
  dataset_path = "/kaggle/input/ravdess-emotional-speech-audio/*/*.wav"
  audio_files = glob(dataset_path)
  labels = [extract_emotion_label(file) for file in audio_files]
  audio_files = [file for file in audio_files if extract_emotion_label(file) != "unknown"]
  labels = [label for label in labels if label != "unknown"]
  model,le=train_emotion_model(audio_files,labels)
  root=Tk()
  app=EmotionDetectionApp(root)
  app.model=model
  app.le=le
  root.mainloop()
  labels = [extract_emotion_label(file) for file in audio_files]
  audio_files = [file for file in audio_files if extract_emotion_label(file) != "unknown"]
  labels = [label for label in labels if label != "unknown"]
  model,le=train_emotion_model(audio_files,labels)
  root=Tk()
  app=EmotionDetectionApp(root)
  app.model=model
  app.le=le
  root.mainloop()